/************************************************************************
Author: 	Kaixin Wang
Project: 	ETS
Purpose: 	Check consistency between inventory and trading data

Date created: 	30 March 2022
Version: 		STATA 17 MP

Last edited: 	10 June 2022
Edited by: 		Kaixin	
*************************************************************************/

set more off
clear all
pause on

********************************************************************************
*** Check NeML inventory daily data
********************************************************************************

use "$TRADING_DATA_OUT/inventory_plant-commodity-date.dta", replace
merge m:1 gpcb_id using "$TRADING_DATA_OUT/index_plant.dta"
keep if _merge == 3
drop _merge 

order date commodity gpcb_id allocated_quantity purchased sold consumed current_inventory
sort date commodity gpcb_id

preserve
keep if allocated_quantity + purchased - sold - consumed != current_inventory
export excel using "$TRADING_DATA_OUT/inventory_to_check.xlsx", firstrow(variables) replace
restore

********************************************************************************
*** Check consistency between daily inventory and daily trading data
********************************************************************************

* prepare daily inventory-trading data set

use "$TRADING_DATA_OUT/record_plant-commodity-date-bid.dta", replace
drop purchased_period sold_period consumed_period blocked_period final_inventory_period trade_qty_cum permit_holding allocated_quantity
drop if trade_qty == .
keep gpcb_id commodity date commodity_period trade_qty
gen purchase_qty = 0
gen sale_qty = 0
replace purchase_qty = trade_qty if trade_qty > 0
replace sale_qty = -trade_qty if trade_qty < 0
collapse (sum) purchase_qty sale_qty, by(gpcb_id commodity commodity_period date)

merge 1:1 gpcb_id commodity date using "$TRADING_DATA_OUT/inventory_plant-commodity-date.dta"
sort gpcb_id commodity_period date
drop _merge

merge m:1 gpcb_id using "$TRADING_DATA_OUT/index_plant.dta"
keep if _merge == 3
drop _merge 

sort gpcb_id commodity_period date
replace purchase_qty = 0 if missing(purchase_qty)
replace sale_qty = 0 if missing(sale_qty)
by gpcb_id commodity_period: gen purchase_qty_cum = sum(purchase_qty)
by gpcb_id commodity_period: gen sale_qty_cum = sum(sale_qty)

order gpcb_id commodity commodity_period date purchase_qty sale_qty allocated_quantity purchase_qty_cum purchased sale_qty_cum sold consumed current_inventory

* flag inconsistencies

gen is_consistent = 1
replace is_consistent = 0 if purchase_qty_cum != purchased
replace is_consistent = 0 if sale_qty_cum != sold

tab is_consistent
// is_consiste |
//          nt |      Freq.     Percent        Cum.
// ------------+-----------------------------------
//           0 |        986        2.41        2.41
//           1 |     39,992       97.59      100.00
// ------------+-----------------------------------
//       Total |     40,978      100.00


* Errors might be carried over to rest of the period.
* We figure out the first instance of each inconsistency.

by gpcb_id commodity_period: gen final_day = (_n == _N)

gen inconsistent_period = 0
replace inconsistent_period = 1 if final_day == 1 & is_consistent == 0
by gpcb_id commodity_period: egen sum_inconsistent_period = sum(inconsistent_period)
drop inconsistent_period final_day
rename sum_inconsistent_period inconsistent_period

keep if inconsistent_period == 1

foreach var of varlist*{
	
	if !inlist("`var'","gpcb_id","commodity","commodity_period","date","pruchase_qty","sale_qty"){
		gen dif_`var' = abs(`var'[_n]-`var'[_n-1])
		replace dif_`var' = 0 if missing(dif_`var')
	}
	
}

egen group = group(gpcb_id commodity)
gen dif_group = group[_n]-group[_n-1]
replace dif_group = 1 if missing(dif_group)

egen dif = rowtotal(dif*)
drop if dif == 0
drop dif* group inconsistent_period

// keep gpcb_id commodity commodity_period
// duplicates drop
// tab gpcb_id
// pause
